import pandas as pd
import plotly as plt
import numpy as np
import plotly.express as px
data = pd.read_csv('all_ticks_wide.csv')
data['timestamp'] = pd.to_datetime(data['timestamp'])
data['timestamp'] = data['timestamp'].dt.tz_convert('Europe/Istanbul')
data.set_index('timestamp', inplace=True)
data.index = data.index.tz_localize(None)
stocks = ['ASELS', 'THYAO', 'PGSUS', 'BANVT', 'MGROS', 'FROTO']
#function gives data between 2015-2017
def data_preparation(data, stocks):
first_day = '2014-12-31'
last_day = '2017-01-01'
two_years_data = data[(data.index > first_day) & (data.index < last_day)]
two_years_data = two_years_data[stocks]
return two_years_data
#function gives daily change of price for each stock
def stocks_data(symbol, data):
#find close values of stocks
stock = data[symbol]
stock_close = data.groupby([data.index.date])[symbol].agg('last')
#creating dataframe
frame = {'Close': stock_close}
df = pd.DataFrame(frame)
#grouping by months and creating a month and a year column
stock_close.index = pd.to_datetime(stock_close.index)
stock_close_monthly = stock_close.groupby([stock_close.index.month])
df['Month'] = stock_close.index.month
df['Year'] = stock_close.index.year
df['Symbol'] = [symbol for i in range(len(df))]
df['Change'] = (df['Close']-df['Close'].shift(1))/df['Close'].shift(1)*100
return df
#outliers detection with IQR and plot each monthly data
def outliers(outliers_df, data, i, j, symbol_name):
if j == 2017:
return outliers_df
monthly_df = data[(data['Month']==i) & (data['Year']==j)]
#interquartilerange
Q1 = monthly_df['Change'].quantile(0.25)
Q3 = monthly_df['Change'].quantile(0.75)
IQR = Q3 - Q1
up = Q3 + 1.5 * IQR
low = Q1 - 1.5 * IQR
outli = monthly_df[(monthly_df['Change']>up) | (monthly_df['Change']<low)]
outliers_df = outliers_df.append(outli)
#because data split for months in that function we have to call plot function here
plot(monthly_df, symbol_name)
if i == 12:
j = j+1
i = 0
return outliers(outliers_df, data, i+1, j, symbol_name)
#plot function
def plot(data, symbol_name):
fig = px.line(x = data.index, y = data['Change'], title=symbol_name, markers= True)
fig.update_layout(
xaxis_title="Time Horizon" + ' (' + str(data.index[0]) + '/' + str(data.index[-1]) + ')',
yaxis_title="Daily Percent Change of Closes"
)
fig.show()
six_stocks_data = data_preparation(data, stocks)
#each outlier is stored in that dataframe
outliers_df = pd.DataFrame()
for k in stocks:
last_data = stocks_data(k, six_stocks_data)
outliers_df = outliers(outliers_df, last_data, 1, 2015, k)
outliers_df
| Close | Month | Year | Symbol | Change | |
|---|---|---|---|---|---|
| 2015-03-16 | 5.9430 | 3 | 2015 | ASELS | 3.405077 |
| 2015-06-08 | 6.2186 | 6 | 2015 | ASELS | -8.361332 |
| 2015-06-24 | 7.1810 | 6 | 2015 | ASELS | 5.054495 |
| 2015-09-15 | 6.8355 | 9 | 2015 | ASELS | 4.922638 |
| 2015-10-16 | 7.0576 | 10 | 2015 | ASELS | -4.026544 |
| ... | ... | ... | ... | ... | ... |
| 2016-09-16 | 25.3739 | 9 | 2016 | FROTO | -4.224135 |
| 2016-09-22 | 26.8713 | 9 | 2016 | FROTO | 3.353936 |
| 2016-09-26 | 25.9334 | 9 | 2016 | FROTO | -3.312592 |
| 2016-12-01 | 23.5737 | 12 | 2016 | FROTO | -3.742706 |
| 2016-12-06 | 25.3389 | 12 | 2016 | FROTO | 5.438166 |
173 rows × 5 columns